Music EDA and Similarity-based Recommender
In this notebook we will go through an in depth analysis of sound and how we can visualize, classify, understand it, and ultimately recommend similar sounding music.
- Introduction
- Purpose
- Download the data
- Explore Audio Data
- EDA ( Exploratory Data Analysis )
- Recomender System
Introduction
Why are we doing this?
Music. Experts have been trying for a long time to understand sound and what differenciates one song from another. How to visualize sound. What makes a tone different from another.
In this notebook we will go through an in depth analysis of sound and how we can visualize, classify and ultimately understand it.
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn
import os
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.decomposition import PCA
import IPython.display as ipd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import preprocessing
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
Download the data
Audio Source - http://marsyas.info/downloads/datasets.html alt1
!wget http://opihi.cs.uvic.ca/sound/genres.tar.gz
!tar -xvf genres.tar.gz
!wget https://raw.githubusercontent.com/kcirerick/deep-music/master/features_30_sec.csv
df = '/content'
print(list(os.listdir(f'{df}/genres/')))
#
y, sr = librosa.load(f'{df}/genres/pop/pop.00034.wav')
print('y:', y, '\n')
print('y shape:', np.shape(y), '\n')
print('Sample Rate (KHz):', sr, '\n')
print('Check Len of the Audio:', 661794/22050)
audio_file, _ = librosa.effects.trim(y)
# the result is an numpy ndarray
print('Audio File:', audio_file, '\n')
print('Audio File shape:', np.shape(audio_file))
plt.figure(figsize = (16, 6))
librosa.display.waveplot(y = audio_file, sr = sr, color = "#A300F9");
plt.title("Sound Waves in Rock 41", fontsize = 25);
n_fft = 2048 # FFT window size
hop_length = 512 # number audio of frames between STFT columns (looks like a good default)
# Short-time Fourier transform (STFT)
D = np.abs(librosa.stft(audio_file, n_fft = n_fft, hop_length = hop_length))
print('Shape of D object:', np.shape(D))
plt.figure(figsize = (16, 6))
plt.plot(D);
#Transform both the y-axis (frequency) to log scale, and the “color” axis (amplitude) to Decibels, which is approx. the log scale of amplitudes.
Spectogram
- What is a spectrogram?
-> A spectrogram is a visual representation of the spectrum of frequencies of a signal as it varies with time. When applied to an audio signal, spectrograms are sometimes called sonographs, voiceprints, or voicegrams (wiki).
- Here we convert the frequency axis to a logarithmic one.
# Convert an amplitude spectrogram to Decibels-scaled spectrogram.
DB = librosa.amplitude_to_db(D, ref = np.max)
# Creating the Spectogram
plt.figure(figsize = (16, 6))
librosa.display.specshow(DB, sr = sr, hop_length = hop_length, x_axis = 'time', y_axis = 'log', cmap = 'cool')
plt.colorbar();
plt.title("pop 34", fontsize = 25);
y, sr = librosa.load(f'{df}/genres/metal/metal.00036.wav')
y, _ = librosa.effects.trim(y)
S = librosa.feature.melspectrogram(y, sr=sr)
S_DB = librosa.amplitude_to_db(S, ref=np.max)
plt.figure(figsize = (16, 6))
librosa.display.specshow(S_DB, sr=sr, hop_length=hop_length, x_axis = 'time', y_axis = 'log',
cmap = 'cool');
plt.colorbar();
plt.title("Metal Mel Spectrogram", fontsize = 23);
y, sr = librosa.load(f'{df}/genres/classical/classical.00036.wav')
y, _ = librosa.effects.trim(y)
S = librosa.feature.melspectrogram(y, sr=sr)
S_DB = librosa.amplitude_to_db(S, ref=np.max)
plt.figure(figsize = (16, 6))
librosa.display.specshow(S_DB, sr=sr, hop_length=hop_length, x_axis = 'time', y_axis = 'log',
cmap = 'cool');
plt.colorbar();
plt.title("Classical Mel Spectrogram", fontsize = 23);
zero_crossings = librosa.zero_crossings(audio_file, pad=False)
print(sum(zero_crossings))
y_harm, y_perc = librosa.effects.hpss(audio_file)
plt.figure(figsize = (16, 6))
plt.plot(y_harm, color = '#A300F9');
plt.plot(y_perc, color = '#FFB100');
tempo, _ = librosa.beat.beat_track(y, sr = sr)
tempo
spectral_centroids = librosa.feature.spectral_centroid(audio_file, sr=sr)[0]
# Shape is a vector
print('Centroids:', spectral_centroids, '\n')
print('Shape of Spectral Centroids:', spectral_centroids.shape, '\n')
# Computing the time variable for visualization
frames = range(len(spectral_centroids))
# Converts frame counts to time (seconds)
t = librosa.frames_to_time(frames)
print('frames:', frames, '\n')
print('t:', t)
# Function that normalizes the Sound Data
def normalize(x, axis=0):
return sklearn.preprocessing.minmax_scale(x, axis=axis)
plt.figure(figsize = (16, 6))
librosa.display.waveplot(audio_file, sr=sr, alpha=0.4, color = '#A300F9');
plt.plot(t, normalize(spectral_centroids), color='#FFB100');
spectral_rolloff = librosa.feature.spectral_rolloff(audio_file, sr=sr)[0]
# The plot
plt.figure(figsize = (16, 6))
librosa.display.waveplot(audio_file, sr=sr, alpha=0.4, color = '#A300F9');
plt.plot(t, normalize(spectral_rolloff), color='#FFB100');
mfccs = librosa.feature.mfcc(audio_file, sr=sr)
print('mfccs shape:', mfccs.shape)
#Displaying the MFCCs:
plt.figure(figsize = (16, 6))
librosa.display.specshow(mfccs, sr=sr, x_axis='time', cmap = 'cool');
Data needs to be scaled:
mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
print('Mean:', mfccs.mean(), '\n')
print('Var:', mfccs.var())
plt.figure(figsize = (16, 6))
librosa.display.specshow(mfccs, sr=sr, x_axis='time', cmap = 'cool');
hop_length = 5000
# Chromogram
chromagram = librosa.feature.chroma_stft(audio_file, sr=sr, hop_length=hop_length)
print('Chromogram shape:', chromagram.shape)
plt.figure(figsize=(16, 6))
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm');
data = pd.read_csv('features_30_sec.csv')
data.head()
spike_cols = [col for col in data.columns if 'mean' in col]
corr = data[spike_cols].corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=np.bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(16, 11));
# Generate a custom diverging colormap
cmap = sns.diverging_palette(0, 25, as_cmap=True, s = 90, l = 45, n = 5)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
plt.title('Correlation Heatmap (for the MEAN variables)', fontsize = 25)
plt.xticks(fontsize = 10)
plt.yticks(fontsize = 10);
x = data[["label", "tempo"]]
f, ax = plt.subplots(figsize=(16, 9));
sns.boxplot(x = "label", y = "tempo", data = x, palette = 'husl');
plt.title('BPM Boxplot for Genres', fontsize = 25)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Genre", fontsize = 15)
plt.ylabel("BPM", fontsize = 15)
data = data.iloc[0:, 1:]
y = data['label']
X = data.loc[:, data.columns != 'label']
#### NORMALIZE X ####
cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)
X = pd.DataFrame(np_scaled, columns = cols)
#### PCA 2 COMPONENTS ####
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2'])
# concatenate with target label
finalDf = pd.concat([principalDf, y], axis = 1)
pca.explained_variance_ratio_
# 44.93 variance explained
plt.figure(figsize = (16, 9))
sns.scatterplot(x = "principal component 1", y = "principal component 2", data = finalDf, hue = "label", alpha = 0.7,
s = 100);
plt.title('PCA on Genres', fontsize = 25)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Principal Component 1", fontsize = 15)
plt.ylabel("Principal Component 2", fontsize = 15)
plt.savefig("PCA Scattert.jpg")
data = pd.read_csv('features_30_sec.csv', index_col='filename')
# Extract labels
labels = data[['label']]
# Drop labels from original dataframe
data = data.drop(columns=['length','label'])
data.head()
# Scale the data
data_scaled=preprocessing.scale(data)
print('Scaled data type:', type(data_scaled))
similarity = cosine_similarity(data_scaled)
print("Similarity shape:", similarity.shape)
# Convert into a dataframe and then set the row index and column names as labels
sim_df_labels = pd.DataFrame(similarity)
sim_df_names = sim_df_labels.set_index(labels.index)
sim_df_names.columns = labels.index
sim_df_names.head()
def find_similar_songs(name):
# Find songs most similar to another song
series = sim_df_names[name].sort_values(ascending = False)
# Remove cosine similarity == 1 (songs will always have the best match with themselves)
series = series.drop(name)
# Display the 5 top matches
print("\n*******\nSimilar songs to ", name)
print(series.head(5))
find_similar_songs('rock.00067.wav')
ipd.Audio(f'{df}/genres/rock/rock.00067.wav')
ipd.Audio(f'{df}/genres/rock/rock.00068.wav')
ipd.Audio(f'{df}/genres/rock/rock.00065.wav')
ipd.Audio(f'{df}/genres/metal/metal.00065.wav')
ipd.Audio(f'{df}/genres/metal/metal.00044.wav')
ipd.Audio(f'{df}/genres/metal/metal.00041.wav')